{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "from cot import Collection\n",
    "from cot.stats import evaluation_as_table\n",
    "from numpy import loadtxt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "ts_33 = Collection.load_thoughtsource_33()\n",
    "# ts_100 = Collection.load_thoughtsource_100()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "913e146870304fec8ba08e62e8900e1c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "32d56f8dba354b16a1ceff569c414a0a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "07521bbaa52f45d7a671a63594522ffb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fa91d51486ad43b4bad120e364784aba",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8ca63be67cc94605aeba09b7b70c40b1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "748c23fd06f641918dd67aea1886b4d4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "e = ts_33.evaluate(overwrite=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['thoughtsource_33_cohere_command-xlarge-nightly_refl-01_None.json',\n",
       " 'thoughtsource_33_openai_chat_gpt-4_refl-01_None.json',\n",
       " 'thoughtsource_33_openai_chat_gpt-3.5-turbo_refl-01_None.json',\n",
       " 'thoughtsource_33_openai_text-davinci-002_refl-01_None.json',\n",
       " 'thoughtsource_33_openai_text-davinci-003_refl-01_None.json',\n",
       " 'thoughtsource_33_huggingface_endpoint_flan-T5-xxl_refl-01_None.json']"
      ]
     },
     "execution_count": 156,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import os\n",
    "\n",
    "def list_files_with_prefix_and_suffix(prefix, suffix):\n",
    "    files = os.listdir()\n",
    "    filtered_files = [file for file in files if file.startswith(prefix) and file.endswith(suffix)]\n",
    "    return filtered_files\n",
    "\n",
    "# Call the function with the desired prefix and suffix\n",
    "filtered_files = list_files_with_prefix_and_suffix(\"thoughtsource_33\", \"json\")\n",
    "\n",
    "# Print the filtered files\n",
    "filtered_files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a7b9dab35f6a47d19dc0dc19fa6c072d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cee646d48e2847ad9935b4bd91030202",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "624d055057f24dbd9fde92b28f70448d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3fd6b689211e46b4b1c21e74a90596a9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d354f47455d94057a6466fb78cfbe395",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "67ed609106774d6cb8d88ff787eca2ac",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ts_merge = Collection.load_thoughtsource_33(load_pregenerated_cots=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for file in filtered_files:\n",
    "    ts_merge = ts_merge.merge(Collection.from_json(file))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "commonsense_qa {6}\n",
      "med_qa {6}\n",
      "medmc_qa {6}\n",
      "open_book_qa {6}\n",
      "strategy_qa {6}\n",
      "worldtree {6}\n"
     ]
    }
   ],
   "source": [
    "ts_merge.number_generated_cots()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "commonsense_qa {63}\n",
      "med_qa {66}\n",
      "medmc_qa {66}\n",
      "open_book_qa {61}\n",
      "strategy_qa {63}\n",
      "worldtree {61}\n"
     ]
    }
   ],
   "source": [
    "ts_33.number_generated_cots()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5acc4a2a49e749f6a1bd3b5cda5bc9c7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8392a506e3044728a3b6b5ab20094e67",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2d0043b879f24d6d860afae6aa2b1193",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "25724e56b34743d4ba0c7750cccf654c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f6f7442fe47b4117905ce106f94322f4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "35d6189e66d04dbca7b2f8c609eaac83",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4c76e695ede849c4821b67f0d538a501",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "26ab7fd2a3f64cec86a67b4b31fd571b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "455a185ce5f0408eacc7838ceffd3713",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0799e13abd404d9aaf7ade8657df520b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "665722cfb9b146ffa1de9fd8a04c2f13",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "20f4ed51458d4eb1be67feb895992ffd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ts_33 = ts_33.merge(ts_merge)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "commonsense_qa {69}\n",
      "med_qa {72}\n",
      "medmc_qa {72}\n",
      "open_book_qa {67}\n",
      "strategy_qa {69}\n",
      "worldtree {67}\n"
     ]
    }
   ],
   "source": [
    "ts_33.number_generated_cots()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fdee019b9c6d4f2f8a38c61c70448584",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e2b99fde126f47359fce3e5578e4457e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cfb2dbba445544bf9a1f7e71bc7820aa",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d7390a3fd9c2485d9f88af58163d8761",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "59a99f4c52a14f84bc78b9cbc803ef53",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "19d2979b17c7414aa6fb256c4ba9b7f8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating json from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ts_33.dump(\"thoughtsource_33.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "metadata": {},
   "outputs": [],
   "source": [
    "ts_33.select_generated_cots(author=\"thoughtsource\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "91255d700ddd4415b1c03faf208e9f30",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d2e82c9a6d2b403582b36c6551df497d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "920e74532bd447d88090adc9d03aa5af",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8badc370a3b94c89baa6c8d81060ce41",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "55cfdc1bc3dd471eb2072e27e671350a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "20cc84a846694a9db813f7927e905f44",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/33 [00:00<?, ?ex/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "ts_merge = Collection.load_thoughtsource_33(load_pregenerated_cots=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 205,
   "metadata": {},
   "outputs": [],
   "source": [
    "ts_33 = Collection.load_thoughtsource_33()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 208,
   "metadata": {},
   "outputs": [],
   "source": [
    "ts_33.select_generated_cots(cot_trigger = \"kojima-09\", reverse=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 210,
   "metadata": {},
   "outputs": [],
   "source": [
    "ts_33.select_generated_cots(cot_trigger = \"kojima-03\", reverse=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 212,
   "metadata": {},
   "outputs": [],
   "source": [
    "ts_33.select_generated_cots(instruction = [\"qa-01\", \"qa-05\", \"qa-08\", \"qa-09\"], reverse=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 204,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ts_33.select_generated_cots(cot_trigger = \"zhou-01\", instruction=\"zhou-01-ins\", reverse=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 215,
   "metadata": {},
   "outputs": [],
   "source": [
    "ts_33.select_generated_cots(author=\"thoughtsource\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 216,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "commonsense_qa {61}\n",
      "med_qa {61}\n",
      "medmc_qa {61}\n",
      "open_book_qa {61}\n",
      "strategy_qa {61}\n",
      "worldtree {61}\n"
     ]
    }
   ],
   "source": [
    "ts_33.number_generated_cots()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/kon/work/ThoughtSource/libs/cot/cot/stats.py:406: PerformanceWarning: indexing past lexsort depth may impact performance.\n",
      "  df.loc[dataset, (instruction + \"_\" + cot_trigger, model)] = v\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<style type=\"text/css\">\n",
       "#T_e5766_row0_col21, #T_e5766_row1_col15, #T_e5766_row2_col15, #T_e5766_row3_col15, #T_e5766_row4_col15, #T_e5766_row5_col27, #T_e5766_row6_col15 {\n",
       "  font-weight: bold;\n",
       "}\n",
       "</style>\n",
       "<table id=\"T_e5766\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th class=\"blank level0\" >&nbsp;</th>\n",
       "      <th id=\"T_e5766_level0_col0\" class=\"col_heading level0 col0\" colspan=\"6\">None_None</th>\n",
       "      <th id=\"T_e5766_level0_col6\" class=\"col_heading level0 col6\" colspan=\"6\">None_kojima-01</th>\n",
       "      <th id=\"T_e5766_level0_col12\" class=\"col_heading level0 col12\" colspan=\"6\">None_zhou-01</th>\n",
       "      <th id=\"T_e5766_level0_col18\" class=\"col_heading level0 col18\" colspan=\"6\">qa-10_None</th>\n",
       "      <th id=\"T_e5766_level0_col24\" class=\"col_heading level0 col24\" colspan=\"6\">qa-12_None</th>\n",
       "      <th id=\"T_e5766_level0_col30\" class=\"col_heading level0 col30\" colspan=\"6\">qa-13_None</th>\n",
       "      <th id=\"T_e5766_level0_col36\" class=\"col_heading level0 col36\" colspan=\"6\">qa-16_None</th>\n",
       "      <th id=\"T_e5766_level0_col42\" class=\"col_heading level0 col42\" colspan=\"6\">qa-17_None</th>\n",
       "      <th id=\"T_e5766_level0_col48\" class=\"col_heading level0 col48\" colspan=\"6\">refl-01_None</th>\n",
       "      <th id=\"T_e5766_level0_col54\" class=\"col_heading level0 col54\" colspan=\"6\">zhou-01-ins_None</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th class=\"blank level1\" >&nbsp;</th>\n",
       "      <th id=\"T_e5766_level1_col0\" class=\"col_heading level1 col0\" >command-xlarge-nightly</th>\n",
       "      <th id=\"T_e5766_level1_col1\" class=\"col_heading level1 col1\" >flan-T5-xxl</th>\n",
       "      <th id=\"T_e5766_level1_col2\" class=\"col_heading level1 col2\" >gpt-3.5-turbo</th>\n",
       "      <th id=\"T_e5766_level1_col3\" class=\"col_heading level1 col3\" >gpt-4</th>\n",
       "      <th id=\"T_e5766_level1_col4\" class=\"col_heading level1 col4\" >text-davinci-002</th>\n",
       "      <th id=\"T_e5766_level1_col5\" class=\"col_heading level1 col5\" >text-davinci-003</th>\n",
       "      <th id=\"T_e5766_level1_col6\" class=\"col_heading level1 col6\" >command-xlarge-nightly</th>\n",
       "      <th id=\"T_e5766_level1_col7\" class=\"col_heading level1 col7\" >flan-T5-xxl</th>\n",
       "      <th id=\"T_e5766_level1_col8\" class=\"col_heading level1 col8\" >gpt-3.5-turbo</th>\n",
       "      <th id=\"T_e5766_level1_col9\" class=\"col_heading level1 col9\" >gpt-4</th>\n",
       "      <th id=\"T_e5766_level1_col10\" class=\"col_heading level1 col10\" >text-davinci-002</th>\n",
       "      <th id=\"T_e5766_level1_col11\" class=\"col_heading level1 col11\" >text-davinci-003</th>\n",
       "      <th id=\"T_e5766_level1_col12\" class=\"col_heading level1 col12\" >command-xlarge-nightly</th>\n",
       "      <th id=\"T_e5766_level1_col13\" class=\"col_heading level1 col13\" >flan-T5-xxl</th>\n",
       "      <th id=\"T_e5766_level1_col14\" class=\"col_heading level1 col14\" >gpt-3.5-turbo</th>\n",
       "      <th id=\"T_e5766_level1_col15\" class=\"col_heading level1 col15\" >gpt-4</th>\n",
       "      <th id=\"T_e5766_level1_col16\" class=\"col_heading level1 col16\" >text-davinci-002</th>\n",
       "      <th id=\"T_e5766_level1_col17\" class=\"col_heading level1 col17\" >text-davinci-003</th>\n",
       "      <th id=\"T_e5766_level1_col18\" class=\"col_heading level1 col18\" >command-xlarge-nightly</th>\n",
       "      <th id=\"T_e5766_level1_col19\" class=\"col_heading level1 col19\" >flan-T5-xxl</th>\n",
       "      <th id=\"T_e5766_level1_col20\" class=\"col_heading level1 col20\" >gpt-3.5-turbo</th>\n",
       "      <th id=\"T_e5766_level1_col21\" class=\"col_heading level1 col21\" >gpt-4</th>\n",
       "      <th id=\"T_e5766_level1_col22\" class=\"col_heading level1 col22\" >text-davinci-002</th>\n",
       "      <th id=\"T_e5766_level1_col23\" class=\"col_heading level1 col23\" >text-davinci-003</th>\n",
       "      <th id=\"T_e5766_level1_col24\" class=\"col_heading level1 col24\" >command-xlarge-nightly</th>\n",
       "      <th id=\"T_e5766_level1_col25\" class=\"col_heading level1 col25\" >flan-T5-xxl</th>\n",
       "      <th id=\"T_e5766_level1_col26\" class=\"col_heading level1 col26\" >gpt-3.5-turbo</th>\n",
       "      <th id=\"T_e5766_level1_col27\" class=\"col_heading level1 col27\" >gpt-4</th>\n",
       "      <th id=\"T_e5766_level1_col28\" class=\"col_heading level1 col28\" >text-davinci-002</th>\n",
       "      <th id=\"T_e5766_level1_col29\" class=\"col_heading level1 col29\" >text-davinci-003</th>\n",
       "      <th id=\"T_e5766_level1_col30\" class=\"col_heading level1 col30\" >command-xlarge-nightly</th>\n",
       "      <th id=\"T_e5766_level1_col31\" class=\"col_heading level1 col31\" >flan-T5-xxl</th>\n",
       "      <th id=\"T_e5766_level1_col32\" class=\"col_heading level1 col32\" >gpt-3.5-turbo</th>\n",
       "      <th id=\"T_e5766_level1_col33\" class=\"col_heading level1 col33\" >gpt-4</th>\n",
       "      <th id=\"T_e5766_level1_col34\" class=\"col_heading level1 col34\" >text-davinci-002</th>\n",
       "      <th id=\"T_e5766_level1_col35\" class=\"col_heading level1 col35\" >text-davinci-003</th>\n",
       "      <th id=\"T_e5766_level1_col36\" class=\"col_heading level1 col36\" >command-xlarge-nightly</th>\n",
       "      <th id=\"T_e5766_level1_col37\" class=\"col_heading level1 col37\" >flan-T5-xxl</th>\n",
       "      <th id=\"T_e5766_level1_col38\" class=\"col_heading level1 col38\" >gpt-3.5-turbo</th>\n",
       "      <th id=\"T_e5766_level1_col39\" class=\"col_heading level1 col39\" >gpt-4</th>\n",
       "      <th id=\"T_e5766_level1_col40\" class=\"col_heading level1 col40\" >text-davinci-002</th>\n",
       "      <th id=\"T_e5766_level1_col41\" class=\"col_heading level1 col41\" >text-davinci-003</th>\n",
       "      <th id=\"T_e5766_level1_col42\" class=\"col_heading level1 col42\" >command-xlarge-nightly</th>\n",
       "      <th id=\"T_e5766_level1_col43\" class=\"col_heading level1 col43\" >flan-T5-xxl</th>\n",
       "      <th id=\"T_e5766_level1_col44\" class=\"col_heading level1 col44\" >gpt-3.5-turbo</th>\n",
       "      <th id=\"T_e5766_level1_col45\" class=\"col_heading level1 col45\" >gpt-4</th>\n",
       "      <th id=\"T_e5766_level1_col46\" class=\"col_heading level1 col46\" >text-davinci-002</th>\n",
       "      <th id=\"T_e5766_level1_col47\" class=\"col_heading level1 col47\" >text-davinci-003</th>\n",
       "      <th id=\"T_e5766_level1_col48\" class=\"col_heading level1 col48\" >command-xlarge-nightly</th>\n",
       "      <th id=\"T_e5766_level1_col49\" class=\"col_heading level1 col49\" >flan-T5-xxl</th>\n",
       "      <th id=\"T_e5766_level1_col50\" class=\"col_heading level1 col50\" >gpt-3.5-turbo</th>\n",
       "      <th id=\"T_e5766_level1_col51\" class=\"col_heading level1 col51\" >gpt-4</th>\n",
       "      <th id=\"T_e5766_level1_col52\" class=\"col_heading level1 col52\" >text-davinci-002</th>\n",
       "      <th id=\"T_e5766_level1_col53\" class=\"col_heading level1 col53\" >text-davinci-003</th>\n",
       "      <th id=\"T_e5766_level1_col54\" class=\"col_heading level1 col54\" >command-xlarge-nightly</th>\n",
       "      <th id=\"T_e5766_level1_col55\" class=\"col_heading level1 col55\" >flan-T5-xxl</th>\n",
       "      <th id=\"T_e5766_level1_col56\" class=\"col_heading level1 col56\" >gpt-3.5-turbo</th>\n",
       "      <th id=\"T_e5766_level1_col57\" class=\"col_heading level1 col57\" >gpt-4</th>\n",
       "      <th id=\"T_e5766_level1_col58\" class=\"col_heading level1 col58\" >text-davinci-002</th>\n",
       "      <th id=\"T_e5766_level1_col59\" class=\"col_heading level1 col59\" >text-davinci-003</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th id=\"T_e5766_level0_row0\" class=\"row_heading level0 row0\" >commonsense_qa</th>\n",
       "      <td id=\"T_e5766_row0_col0\" class=\"data row0 col0\" >0.52</td>\n",
       "      <td id=\"T_e5766_row0_col1\" class=\"data row0 col1\" >0.85</td>\n",
       "      <td id=\"T_e5766_row0_col2\" class=\"data row0 col2\" >0.76</td>\n",
       "      <td id=\"T_e5766_row0_col3\" class=\"data row0 col3\" >0.73</td>\n",
       "      <td id=\"T_e5766_row0_col4\" class=\"data row0 col4\" >0.76</td>\n",
       "      <td id=\"T_e5766_row0_col5\" class=\"data row0 col5\" >0.73</td>\n",
       "      <td id=\"T_e5766_row0_col6\" class=\"data row0 col6\" >0.48</td>\n",
       "      <td id=\"T_e5766_row0_col7\" class=\"data row0 col7\" >0.85</td>\n",
       "      <td id=\"T_e5766_row0_col8\" class=\"data row0 col8\" >0.73</td>\n",
       "      <td id=\"T_e5766_row0_col9\" class=\"data row0 col9\" >0.73</td>\n",
       "      <td id=\"T_e5766_row0_col10\" class=\"data row0 col10\" >0.73</td>\n",
       "      <td id=\"T_e5766_row0_col11\" class=\"data row0 col11\" >0.67</td>\n",
       "      <td id=\"T_e5766_row0_col12\" class=\"data row0 col12\" >0.73</td>\n",
       "      <td id=\"T_e5766_row0_col13\" class=\"data row0 col13\" >0.85</td>\n",
       "      <td id=\"T_e5766_row0_col14\" class=\"data row0 col14\" >0.70</td>\n",
       "      <td id=\"T_e5766_row0_col15\" class=\"data row0 col15\" >0.76</td>\n",
       "      <td id=\"T_e5766_row0_col16\" class=\"data row0 col16\" >0.61</td>\n",
       "      <td id=\"T_e5766_row0_col17\" class=\"data row0 col17\" >0.67</td>\n",
       "      <td id=\"T_e5766_row0_col18\" class=\"data row0 col18\" >0.70</td>\n",
       "      <td id=\"T_e5766_row0_col19\" class=\"data row0 col19\" >0.85</td>\n",
       "      <td id=\"T_e5766_row0_col20\" class=\"data row0 col20\" >0.70</td>\n",
       "      <td id=\"T_e5766_row0_col21\" class=\"data row0 col21\" >0.88</td>\n",
       "      <td id=\"T_e5766_row0_col22\" class=\"data row0 col22\" >0.70</td>\n",
       "      <td id=\"T_e5766_row0_col23\" class=\"data row0 col23\" >0.70</td>\n",
       "      <td id=\"T_e5766_row0_col24\" class=\"data row0 col24\" >0.70</td>\n",
       "      <td id=\"T_e5766_row0_col25\" class=\"data row0 col25\" >0.85</td>\n",
       "      <td id=\"T_e5766_row0_col26\" class=\"data row0 col26\" >0.58</td>\n",
       "      <td id=\"T_e5766_row0_col27\" class=\"data row0 col27\" >0.70</td>\n",
       "      <td id=\"T_e5766_row0_col28\" class=\"data row0 col28\" >0.73</td>\n",
       "      <td id=\"T_e5766_row0_col29\" class=\"data row0 col29\" >0.79</td>\n",
       "      <td id=\"T_e5766_row0_col30\" class=\"data row0 col30\" >0.76</td>\n",
       "      <td id=\"T_e5766_row0_col31\" class=\"data row0 col31\" >0.85</td>\n",
       "      <td id=\"T_e5766_row0_col32\" class=\"data row0 col32\" >0.64</td>\n",
       "      <td id=\"T_e5766_row0_col33\" class=\"data row0 col33\" >0.82</td>\n",
       "      <td id=\"T_e5766_row0_col34\" class=\"data row0 col34\" >0.73</td>\n",
       "      <td id=\"T_e5766_row0_col35\" class=\"data row0 col35\" >0.67</td>\n",
       "      <td id=\"T_e5766_row0_col36\" class=\"data row0 col36\" >0.67</td>\n",
       "      <td id=\"T_e5766_row0_col37\" class=\"data row0 col37\" >0.73</td>\n",
       "      <td id=\"T_e5766_row0_col38\" class=\"data row0 col38\" >0.64</td>\n",
       "      <td id=\"T_e5766_row0_col39\" class=\"data row0 col39\" >0.67</td>\n",
       "      <td id=\"T_e5766_row0_col40\" class=\"data row0 col40\" >0.70</td>\n",
       "      <td id=\"T_e5766_row0_col41\" class=\"data row0 col41\" >0.58</td>\n",
       "      <td id=\"T_e5766_row0_col42\" class=\"data row0 col42\" >0.61</td>\n",
       "      <td id=\"T_e5766_row0_col43\" class=\"data row0 col43\" >0.85</td>\n",
       "      <td id=\"T_e5766_row0_col44\" class=\"data row0 col44\" >0.61</td>\n",
       "      <td id=\"T_e5766_row0_col45\" class=\"data row0 col45\" >0.64</td>\n",
       "      <td id=\"T_e5766_row0_col46\" class=\"data row0 col46\" >0.55</td>\n",
       "      <td id=\"T_e5766_row0_col47\" class=\"data row0 col47\" >0.64</td>\n",
       "      <td id=\"T_e5766_row0_col48\" class=\"data row0 col48\" >0.70</td>\n",
       "      <td id=\"T_e5766_row0_col49\" class=\"data row0 col49\" >0.85</td>\n",
       "      <td id=\"T_e5766_row0_col50\" class=\"data row0 col50\" >0.73</td>\n",
       "      <td id=\"T_e5766_row0_col51\" class=\"data row0 col51\" >0.82</td>\n",
       "      <td id=\"T_e5766_row0_col52\" class=\"data row0 col52\" >0.70</td>\n",
       "      <td id=\"T_e5766_row0_col53\" class=\"data row0 col53\" >0.76</td>\n",
       "      <td id=\"T_e5766_row0_col54\" class=\"data row0 col54\" >0.73</td>\n",
       "      <td id=\"T_e5766_row0_col55\" class=\"data row0 col55\" >0.82</td>\n",
       "      <td id=\"T_e5766_row0_col56\" class=\"data row0 col56\" >0.79</td>\n",
       "      <td id=\"T_e5766_row0_col57\" class=\"data row0 col57\" >0.76</td>\n",
       "      <td id=\"T_e5766_row0_col58\" class=\"data row0 col58\" >0.73</td>\n",
       "      <td id=\"T_e5766_row0_col59\" class=\"data row0 col59\" >0.73</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_e5766_level0_row1\" class=\"row_heading level0 row1\" >med_qa</th>\n",
       "      <td id=\"T_e5766_row1_col0\" class=\"data row1 col0\" >0.18</td>\n",
       "      <td id=\"T_e5766_row1_col1\" class=\"data row1 col1\" >0.21</td>\n",
       "      <td id=\"T_e5766_row1_col2\" class=\"data row1 col2\" >0.55</td>\n",
       "      <td id=\"T_e5766_row1_col3\" class=\"data row1 col3\" >0.64</td>\n",
       "      <td id=\"T_e5766_row1_col4\" class=\"data row1 col4\" >0.33</td>\n",
       "      <td id=\"T_e5766_row1_col5\" class=\"data row1 col5\" >0.30</td>\n",
       "      <td id=\"T_e5766_row1_col6\" class=\"data row1 col6\" >0.30</td>\n",
       "      <td id=\"T_e5766_row1_col7\" class=\"data row1 col7\" >0.27</td>\n",
       "      <td id=\"T_e5766_row1_col8\" class=\"data row1 col8\" >0.52</td>\n",
       "      <td id=\"T_e5766_row1_col9\" class=\"data row1 col9\" >0.70</td>\n",
       "      <td id=\"T_e5766_row1_col10\" class=\"data row1 col10\" >0.12</td>\n",
       "      <td id=\"T_e5766_row1_col11\" class=\"data row1 col11\" >0.33</td>\n",
       "      <td id=\"T_e5766_row1_col12\" class=\"data row1 col12\" >0.27</td>\n",
       "      <td id=\"T_e5766_row1_col13\" class=\"data row1 col13\" >0.12</td>\n",
       "      <td id=\"T_e5766_row1_col14\" class=\"data row1 col14\" >0.61</td>\n",
       "      <td id=\"T_e5766_row1_col15\" class=\"data row1 col15\" >0.73</td>\n",
       "      <td id=\"T_e5766_row1_col16\" class=\"data row1 col16\" >0.24</td>\n",
       "      <td id=\"T_e5766_row1_col17\" class=\"data row1 col17\" >0.39</td>\n",
       "      <td id=\"T_e5766_row1_col18\" class=\"data row1 col18\" >0.27</td>\n",
       "      <td id=\"T_e5766_row1_col19\" class=\"data row1 col19\" >0.24</td>\n",
       "      <td id=\"T_e5766_row1_col20\" class=\"data row1 col20\" >0.42</td>\n",
       "      <td id=\"T_e5766_row1_col21\" class=\"data row1 col21\" >0.61</td>\n",
       "      <td id=\"T_e5766_row1_col22\" class=\"data row1 col22\" >0.24</td>\n",
       "      <td id=\"T_e5766_row1_col23\" class=\"data row1 col23\" >0.36</td>\n",
       "      <td id=\"T_e5766_row1_col24\" class=\"data row1 col24\" >0.27</td>\n",
       "      <td id=\"T_e5766_row1_col25\" class=\"data row1 col25\" >0.15</td>\n",
       "      <td id=\"T_e5766_row1_col26\" class=\"data row1 col26\" >0.55</td>\n",
       "      <td id=\"T_e5766_row1_col27\" class=\"data row1 col27\" >0.67</td>\n",
       "      <td id=\"T_e5766_row1_col28\" class=\"data row1 col28\" >0.36</td>\n",
       "      <td id=\"T_e5766_row1_col29\" class=\"data row1 col29\" >0.30</td>\n",
       "      <td id=\"T_e5766_row1_col30\" class=\"data row1 col30\" >0.24</td>\n",
       "      <td id=\"T_e5766_row1_col31\" class=\"data row1 col31\" >0.24</td>\n",
       "      <td id=\"T_e5766_row1_col32\" class=\"data row1 col32\" >0.42</td>\n",
       "      <td id=\"T_e5766_row1_col33\" class=\"data row1 col33\" >0.61</td>\n",
       "      <td id=\"T_e5766_row1_col34\" class=\"data row1 col34\" >0.30</td>\n",
       "      <td id=\"T_e5766_row1_col35\" class=\"data row1 col35\" >0.39</td>\n",
       "      <td id=\"T_e5766_row1_col36\" class=\"data row1 col36\" >0.33</td>\n",
       "      <td id=\"T_e5766_row1_col37\" class=\"data row1 col37\" >0.24</td>\n",
       "      <td id=\"T_e5766_row1_col38\" class=\"data row1 col38\" >0.48</td>\n",
       "      <td id=\"T_e5766_row1_col39\" class=\"data row1 col39\" >0.64</td>\n",
       "      <td id=\"T_e5766_row1_col40\" class=\"data row1 col40\" >0.33</td>\n",
       "      <td id=\"T_e5766_row1_col41\" class=\"data row1 col41\" >0.36</td>\n",
       "      <td id=\"T_e5766_row1_col42\" class=\"data row1 col42\" >0.30</td>\n",
       "      <td id=\"T_e5766_row1_col43\" class=\"data row1 col43\" >0.24</td>\n",
       "      <td id=\"T_e5766_row1_col44\" class=\"data row1 col44\" >0.52</td>\n",
       "      <td id=\"T_e5766_row1_col45\" class=\"data row1 col45\" >0.61</td>\n",
       "      <td id=\"T_e5766_row1_col46\" class=\"data row1 col46\" >0.27</td>\n",
       "      <td id=\"T_e5766_row1_col47\" class=\"data row1 col47\" >0.24</td>\n",
       "      <td id=\"T_e5766_row1_col48\" class=\"data row1 col48\" >0.21</td>\n",
       "      <td id=\"T_e5766_row1_col49\" class=\"data row1 col49\" >0.18</td>\n",
       "      <td id=\"T_e5766_row1_col50\" class=\"data row1 col50\" >0.48</td>\n",
       "      <td id=\"T_e5766_row1_col51\" class=\"data row1 col51\" >0.64</td>\n",
       "      <td id=\"T_e5766_row1_col52\" class=\"data row1 col52\" >0.27</td>\n",
       "      <td id=\"T_e5766_row1_col53\" class=\"data row1 col53\" >0.33</td>\n",
       "      <td id=\"T_e5766_row1_col54\" class=\"data row1 col54\" >0.24</td>\n",
       "      <td id=\"T_e5766_row1_col55\" class=\"data row1 col55\" >0.21</td>\n",
       "      <td id=\"T_e5766_row1_col56\" class=\"data row1 col56\" >0.48</td>\n",
       "      <td id=\"T_e5766_row1_col57\" class=\"data row1 col57\" >0.61</td>\n",
       "      <td id=\"T_e5766_row1_col58\" class=\"data row1 col58\" >0.24</td>\n",
       "      <td id=\"T_e5766_row1_col59\" class=\"data row1 col59\" >0.36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_e5766_level0_row2\" class=\"row_heading level0 row2\" >medmc_qa</th>\n",
       "      <td id=\"T_e5766_row2_col0\" class=\"data row2 col0\" >0.27</td>\n",
       "      <td id=\"T_e5766_row2_col1\" class=\"data row2 col1\" >0.36</td>\n",
       "      <td id=\"T_e5766_row2_col2\" class=\"data row2 col2\" >0.64</td>\n",
       "      <td id=\"T_e5766_row2_col3\" class=\"data row2 col3\" >0.79</td>\n",
       "      <td id=\"T_e5766_row2_col4\" class=\"data row2 col4\" >0.30</td>\n",
       "      <td id=\"T_e5766_row2_col5\" class=\"data row2 col5\" >0.36</td>\n",
       "      <td id=\"T_e5766_row2_col6\" class=\"data row2 col6\" >0.15</td>\n",
       "      <td id=\"T_e5766_row2_col7\" class=\"data row2 col7\" >0.30</td>\n",
       "      <td id=\"T_e5766_row2_col8\" class=\"data row2 col8\" >0.58</td>\n",
       "      <td id=\"T_e5766_row2_col9\" class=\"data row2 col9\" >0.82</td>\n",
       "      <td id=\"T_e5766_row2_col10\" class=\"data row2 col10\" >0.33</td>\n",
       "      <td id=\"T_e5766_row2_col11\" class=\"data row2 col11\" >0.36</td>\n",
       "      <td id=\"T_e5766_row2_col12\" class=\"data row2 col12\" >0.42</td>\n",
       "      <td id=\"T_e5766_row2_col13\" class=\"data row2 col13\" >0.36</td>\n",
       "      <td id=\"T_e5766_row2_col14\" class=\"data row2 col14\" >0.61</td>\n",
       "      <td id=\"T_e5766_row2_col15\" class=\"data row2 col15\" >0.85</td>\n",
       "      <td id=\"T_e5766_row2_col16\" class=\"data row2 col16\" >0.45</td>\n",
       "      <td id=\"T_e5766_row2_col17\" class=\"data row2 col17\" >0.48</td>\n",
       "      <td id=\"T_e5766_row2_col18\" class=\"data row2 col18\" >0.27</td>\n",
       "      <td id=\"T_e5766_row2_col19\" class=\"data row2 col19\" >0.33</td>\n",
       "      <td id=\"T_e5766_row2_col20\" class=\"data row2 col20\" >0.64</td>\n",
       "      <td id=\"T_e5766_row2_col21\" class=\"data row2 col21\" >0.76</td>\n",
       "      <td id=\"T_e5766_row2_col22\" class=\"data row2 col22\" >0.39</td>\n",
       "      <td id=\"T_e5766_row2_col23\" class=\"data row2 col23\" >0.45</td>\n",
       "      <td id=\"T_e5766_row2_col24\" class=\"data row2 col24\" >0.30</td>\n",
       "      <td id=\"T_e5766_row2_col25\" class=\"data row2 col25\" >0.42</td>\n",
       "      <td id=\"T_e5766_row2_col26\" class=\"data row2 col26\" >0.64</td>\n",
       "      <td id=\"T_e5766_row2_col27\" class=\"data row2 col27\" >0.76</td>\n",
       "      <td id=\"T_e5766_row2_col28\" class=\"data row2 col28\" >0.48</td>\n",
       "      <td id=\"T_e5766_row2_col29\" class=\"data row2 col29\" >0.36</td>\n",
       "      <td id=\"T_e5766_row2_col30\" class=\"data row2 col30\" >0.36</td>\n",
       "      <td id=\"T_e5766_row2_col31\" class=\"data row2 col31\" >0.36</td>\n",
       "      <td id=\"T_e5766_row2_col32\" class=\"data row2 col32\" >0.52</td>\n",
       "      <td id=\"T_e5766_row2_col33\" class=\"data row2 col33\" >0.76</td>\n",
       "      <td id=\"T_e5766_row2_col34\" class=\"data row2 col34\" >0.45</td>\n",
       "      <td id=\"T_e5766_row2_col35\" class=\"data row2 col35\" >0.36</td>\n",
       "      <td id=\"T_e5766_row2_col36\" class=\"data row2 col36\" >0.36</td>\n",
       "      <td id=\"T_e5766_row2_col37\" class=\"data row2 col37\" >0.30</td>\n",
       "      <td id=\"T_e5766_row2_col38\" class=\"data row2 col38\" >0.64</td>\n",
       "      <td id=\"T_e5766_row2_col39\" class=\"data row2 col39\" >0.85</td>\n",
       "      <td id=\"T_e5766_row2_col40\" class=\"data row2 col40\" >0.52</td>\n",
       "      <td id=\"T_e5766_row2_col41\" class=\"data row2 col41\" >0.39</td>\n",
       "      <td id=\"T_e5766_row2_col42\" class=\"data row2 col42\" >0.36</td>\n",
       "      <td id=\"T_e5766_row2_col43\" class=\"data row2 col43\" >0.39</td>\n",
       "      <td id=\"T_e5766_row2_col44\" class=\"data row2 col44\" >0.67</td>\n",
       "      <td id=\"T_e5766_row2_col45\" class=\"data row2 col45\" >0.67</td>\n",
       "      <td id=\"T_e5766_row2_col46\" class=\"data row2 col46\" >0.39</td>\n",
       "      <td id=\"T_e5766_row2_col47\" class=\"data row2 col47\" >0.42</td>\n",
       "      <td id=\"T_e5766_row2_col48\" class=\"data row2 col48\" >0.24</td>\n",
       "      <td id=\"T_e5766_row2_col49\" class=\"data row2 col49\" >0.27</td>\n",
       "      <td id=\"T_e5766_row2_col50\" class=\"data row2 col50\" >0.55</td>\n",
       "      <td id=\"T_e5766_row2_col51\" class=\"data row2 col51\" >0.76</td>\n",
       "      <td id=\"T_e5766_row2_col52\" class=\"data row2 col52\" >0.30</td>\n",
       "      <td id=\"T_e5766_row2_col53\" class=\"data row2 col53\" >0.39</td>\n",
       "      <td id=\"T_e5766_row2_col54\" class=\"data row2 col54\" >0.33</td>\n",
       "      <td id=\"T_e5766_row2_col55\" class=\"data row2 col55\" >0.30</td>\n",
       "      <td id=\"T_e5766_row2_col56\" class=\"data row2 col56\" >0.48</td>\n",
       "      <td id=\"T_e5766_row2_col57\" class=\"data row2 col57\" >0.82</td>\n",
       "      <td id=\"T_e5766_row2_col58\" class=\"data row2 col58\" >0.33</td>\n",
       "      <td id=\"T_e5766_row2_col59\" class=\"data row2 col59\" >0.33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_e5766_level0_row3\" class=\"row_heading level0 row3\" >open_book_qa</th>\n",
       "      <td id=\"T_e5766_row3_col0\" class=\"data row3 col0\" >0.58</td>\n",
       "      <td id=\"T_e5766_row3_col1\" class=\"data row3 col1\" >0.76</td>\n",
       "      <td id=\"T_e5766_row3_col2\" class=\"data row3 col2\" >0.85</td>\n",
       "      <td id=\"T_e5766_row3_col3\" class=\"data row3 col3\" >0.91</td>\n",
       "      <td id=\"T_e5766_row3_col4\" class=\"data row3 col4\" >0.64</td>\n",
       "      <td id=\"T_e5766_row3_col5\" class=\"data row3 col5\" >0.64</td>\n",
       "      <td id=\"T_e5766_row3_col6\" class=\"data row3 col6\" >0.42</td>\n",
       "      <td id=\"T_e5766_row3_col7\" class=\"data row3 col7\" >0.82</td>\n",
       "      <td id=\"T_e5766_row3_col8\" class=\"data row3 col8\" >0.79</td>\n",
       "      <td id=\"T_e5766_row3_col9\" class=\"data row3 col9\" >0.94</td>\n",
       "      <td id=\"T_e5766_row3_col10\" class=\"data row3 col10\" >0.55</td>\n",
       "      <td id=\"T_e5766_row3_col11\" class=\"data row3 col11\" >0.58</td>\n",
       "      <td id=\"T_e5766_row3_col12\" class=\"data row3 col12\" >0.61</td>\n",
       "      <td id=\"T_e5766_row3_col13\" class=\"data row3 col13\" >0.82</td>\n",
       "      <td id=\"T_e5766_row3_col14\" class=\"data row3 col14\" >0.73</td>\n",
       "      <td id=\"T_e5766_row3_col15\" class=\"data row3 col15\" >0.97</td>\n",
       "      <td id=\"T_e5766_row3_col16\" class=\"data row3 col16\" >0.76</td>\n",
       "      <td id=\"T_e5766_row3_col17\" class=\"data row3 col17\" >0.79</td>\n",
       "      <td id=\"T_e5766_row3_col18\" class=\"data row3 col18\" >0.58</td>\n",
       "      <td id=\"T_e5766_row3_col19\" class=\"data row3 col19\" >0.79</td>\n",
       "      <td id=\"T_e5766_row3_col20\" class=\"data row3 col20\" >0.76</td>\n",
       "      <td id=\"T_e5766_row3_col21\" class=\"data row3 col21\" >0.94</td>\n",
       "      <td id=\"T_e5766_row3_col22\" class=\"data row3 col22\" >0.55</td>\n",
       "      <td id=\"T_e5766_row3_col23\" class=\"data row3 col23\" >0.73</td>\n",
       "      <td id=\"T_e5766_row3_col24\" class=\"data row3 col24\" >0.58</td>\n",
       "      <td id=\"T_e5766_row3_col25\" class=\"data row3 col25\" >0.79</td>\n",
       "      <td id=\"T_e5766_row3_col26\" class=\"data row3 col26\" >0.85</td>\n",
       "      <td id=\"T_e5766_row3_col27\" class=\"data row3 col27\" >0.91</td>\n",
       "      <td id=\"T_e5766_row3_col28\" class=\"data row3 col28\" >0.52</td>\n",
       "      <td id=\"T_e5766_row3_col29\" class=\"data row3 col29\" >0.79</td>\n",
       "      <td id=\"T_e5766_row3_col30\" class=\"data row3 col30\" >0.58</td>\n",
       "      <td id=\"T_e5766_row3_col31\" class=\"data row3 col31\" >0.76</td>\n",
       "      <td id=\"T_e5766_row3_col32\" class=\"data row3 col32\" >0.73</td>\n",
       "      <td id=\"T_e5766_row3_col33\" class=\"data row3 col33\" >0.88</td>\n",
       "      <td id=\"T_e5766_row3_col34\" class=\"data row3 col34\" >0.39</td>\n",
       "      <td id=\"T_e5766_row3_col35\" class=\"data row3 col35\" >0.61</td>\n",
       "      <td id=\"T_e5766_row3_col36\" class=\"data row3 col36\" >0.55</td>\n",
       "      <td id=\"T_e5766_row3_col37\" class=\"data row3 col37\" >0.67</td>\n",
       "      <td id=\"T_e5766_row3_col38\" class=\"data row3 col38\" >0.67</td>\n",
       "      <td id=\"T_e5766_row3_col39\" class=\"data row3 col39\" >0.85</td>\n",
       "      <td id=\"T_e5766_row3_col40\" class=\"data row3 col40\" >0.61</td>\n",
       "      <td id=\"T_e5766_row3_col41\" class=\"data row3 col41\" >0.64</td>\n",
       "      <td id=\"T_e5766_row3_col42\" class=\"data row3 col42\" >0.61</td>\n",
       "      <td id=\"T_e5766_row3_col43\" class=\"data row3 col43\" >0.73</td>\n",
       "      <td id=\"T_e5766_row3_col44\" class=\"data row3 col44\" >0.73</td>\n",
       "      <td id=\"T_e5766_row3_col45\" class=\"data row3 col45\" >0.91</td>\n",
       "      <td id=\"T_e5766_row3_col46\" class=\"data row3 col46\" >0.48</td>\n",
       "      <td id=\"T_e5766_row3_col47\" class=\"data row3 col47\" >0.73</td>\n",
       "      <td id=\"T_e5766_row3_col48\" class=\"data row3 col48\" >0.61</td>\n",
       "      <td id=\"T_e5766_row3_col49\" class=\"data row3 col49\" >0.79</td>\n",
       "      <td id=\"T_e5766_row3_col50\" class=\"data row3 col50\" >0.76</td>\n",
       "      <td id=\"T_e5766_row3_col51\" class=\"data row3 col51\" >0.88</td>\n",
       "      <td id=\"T_e5766_row3_col52\" class=\"data row3 col52\" >0.67</td>\n",
       "      <td id=\"T_e5766_row3_col53\" class=\"data row3 col53\" >0.64</td>\n",
       "      <td id=\"T_e5766_row3_col54\" class=\"data row3 col54\" >0.64</td>\n",
       "      <td id=\"T_e5766_row3_col55\" class=\"data row3 col55\" >0.76</td>\n",
       "      <td id=\"T_e5766_row3_col56\" class=\"data row3 col56\" >0.73</td>\n",
       "      <td id=\"T_e5766_row3_col57\" class=\"data row3 col57\" >0.94</td>\n",
       "      <td id=\"T_e5766_row3_col58\" class=\"data row3 col58\" >0.45</td>\n",
       "      <td id=\"T_e5766_row3_col59\" class=\"data row3 col59\" >0.82</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_e5766_level0_row4\" class=\"row_heading level0 row4\" >strategy_qa</th>\n",
       "      <td id=\"T_e5766_row4_col0\" class=\"data row4 col0\" >0.55</td>\n",
       "      <td id=\"T_e5766_row4_col1\" class=\"data row4 col1\" >0.67</td>\n",
       "      <td id=\"T_e5766_row4_col2\" class=\"data row4 col2\" >0.55</td>\n",
       "      <td id=\"T_e5766_row4_col3\" class=\"data row4 col3\" >0.73</td>\n",
       "      <td id=\"T_e5766_row4_col4\" class=\"data row4 col4\" >0.39</td>\n",
       "      <td id=\"T_e5766_row4_col5\" class=\"data row4 col5\" >0.61</td>\n",
       "      <td id=\"T_e5766_row4_col6\" class=\"data row4 col6\" >0.58</td>\n",
       "      <td id=\"T_e5766_row4_col7\" class=\"data row4 col7\" >0.70</td>\n",
       "      <td id=\"T_e5766_row4_col8\" class=\"data row4 col8\" >0.61</td>\n",
       "      <td id=\"T_e5766_row4_col9\" class=\"data row4 col9\" >0.85</td>\n",
       "      <td id=\"T_e5766_row4_col10\" class=\"data row4 col10\" >0.42</td>\n",
       "      <td id=\"T_e5766_row4_col11\" class=\"data row4 col11\" >0.64</td>\n",
       "      <td id=\"T_e5766_row4_col12\" class=\"data row4 col12\" >0.24</td>\n",
       "      <td id=\"T_e5766_row4_col13\" class=\"data row4 col13\" >0.48</td>\n",
       "      <td id=\"T_e5766_row4_col14\" class=\"data row4 col14\" >0.61</td>\n",
       "      <td id=\"T_e5766_row4_col15\" class=\"data row4 col15\" >0.91</td>\n",
       "      <td id=\"T_e5766_row4_col16\" class=\"data row4 col16\" >0.70</td>\n",
       "      <td id=\"T_e5766_row4_col17\" class=\"data row4 col17\" >0.64</td>\n",
       "      <td id=\"T_e5766_row4_col18\" class=\"data row4 col18\" >0.12</td>\n",
       "      <td id=\"T_e5766_row4_col19\" class=\"data row4 col19\" >0.58</td>\n",
       "      <td id=\"T_e5766_row4_col20\" class=\"data row4 col20\" >0.61</td>\n",
       "      <td id=\"T_e5766_row4_col21\" class=\"data row4 col21\" >0.85</td>\n",
       "      <td id=\"T_e5766_row4_col22\" class=\"data row4 col22\" >0.39</td>\n",
       "      <td id=\"T_e5766_row4_col23\" class=\"data row4 col23\" >0.58</td>\n",
       "      <td id=\"T_e5766_row4_col24\" class=\"data row4 col24\" >0.09</td>\n",
       "      <td id=\"T_e5766_row4_col25\" class=\"data row4 col25\" >0.67</td>\n",
       "      <td id=\"T_e5766_row4_col26\" class=\"data row4 col26\" >0.52</td>\n",
       "      <td id=\"T_e5766_row4_col27\" class=\"data row4 col27\" >0.76</td>\n",
       "      <td id=\"T_e5766_row4_col28\" class=\"data row4 col28\" >0.42</td>\n",
       "      <td id=\"T_e5766_row4_col29\" class=\"data row4 col29\" >0.67</td>\n",
       "      <td id=\"T_e5766_row4_col30\" class=\"data row4 col30\" >0.12</td>\n",
       "      <td id=\"T_e5766_row4_col31\" class=\"data row4 col31\" >0.67</td>\n",
       "      <td id=\"T_e5766_row4_col32\" class=\"data row4 col32\" >0.70</td>\n",
       "      <td id=\"T_e5766_row4_col33\" class=\"data row4 col33\" >0.85</td>\n",
       "      <td id=\"T_e5766_row4_col34\" class=\"data row4 col34\" >0.61</td>\n",
       "      <td id=\"T_e5766_row4_col35\" class=\"data row4 col35\" >0.70</td>\n",
       "      <td id=\"T_e5766_row4_col36\" class=\"data row4 col36\" >0.33</td>\n",
       "      <td id=\"T_e5766_row4_col37\" class=\"data row4 col37\" >0.67</td>\n",
       "      <td id=\"T_e5766_row4_col38\" class=\"data row4 col38\" >0.64</td>\n",
       "      <td id=\"T_e5766_row4_col39\" class=\"data row4 col39\" >0.85</td>\n",
       "      <td id=\"T_e5766_row4_col40\" class=\"data row4 col40\" >0.61</td>\n",
       "      <td id=\"T_e5766_row4_col41\" class=\"data row4 col41\" >0.55</td>\n",
       "      <td id=\"T_e5766_row4_col42\" class=\"data row4 col42\" >0.30</td>\n",
       "      <td id=\"T_e5766_row4_col43\" class=\"data row4 col43\" >0.67</td>\n",
       "      <td id=\"T_e5766_row4_col44\" class=\"data row4 col44\" >0.67</td>\n",
       "      <td id=\"T_e5766_row4_col45\" class=\"data row4 col45\" >0.88</td>\n",
       "      <td id=\"T_e5766_row4_col46\" class=\"data row4 col46\" >0.45</td>\n",
       "      <td id=\"T_e5766_row4_col47\" class=\"data row4 col47\" >0.58</td>\n",
       "      <td id=\"T_e5766_row4_col48\" class=\"data row4 col48\" >0.06</td>\n",
       "      <td id=\"T_e5766_row4_col49\" class=\"data row4 col49\" >0.48</td>\n",
       "      <td id=\"T_e5766_row4_col50\" class=\"data row4 col50\" >0.52</td>\n",
       "      <td id=\"T_e5766_row4_col51\" class=\"data row4 col51\" >0.70</td>\n",
       "      <td id=\"T_e5766_row4_col52\" class=\"data row4 col52\" >0.55</td>\n",
       "      <td id=\"T_e5766_row4_col53\" class=\"data row4 col53\" >0.64</td>\n",
       "      <td id=\"T_e5766_row4_col54\" class=\"data row4 col54\" >0.09</td>\n",
       "      <td id=\"T_e5766_row4_col55\" class=\"data row4 col55\" >0.61</td>\n",
       "      <td id=\"T_e5766_row4_col56\" class=\"data row4 col56\" >0.58</td>\n",
       "      <td id=\"T_e5766_row4_col57\" class=\"data row4 col57\" >0.79</td>\n",
       "      <td id=\"T_e5766_row4_col58\" class=\"data row4 col58\" >0.48</td>\n",
       "      <td id=\"T_e5766_row4_col59\" class=\"data row4 col59\" >0.64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_e5766_level0_row5\" class=\"row_heading level0 row5\" >worldtree</th>\n",
       "      <td id=\"T_e5766_row5_col0\" class=\"data row5 col0\" >0.61</td>\n",
       "      <td id=\"T_e5766_row5_col1\" class=\"data row5 col1\" >0.88</td>\n",
       "      <td id=\"T_e5766_row5_col2\" class=\"data row5 col2\" >0.94</td>\n",
       "      <td id=\"T_e5766_row5_col3\" class=\"data row5 col3\" >0.97</td>\n",
       "      <td id=\"T_e5766_row5_col4\" class=\"data row5 col4\" >0.88</td>\n",
       "      <td id=\"T_e5766_row5_col5\" class=\"data row5 col5\" >0.94</td>\n",
       "      <td id=\"T_e5766_row5_col6\" class=\"data row5 col6\" >0.61</td>\n",
       "      <td id=\"T_e5766_row5_col7\" class=\"data row5 col7\" >0.79</td>\n",
       "      <td id=\"T_e5766_row5_col8\" class=\"data row5 col8\" >0.91</td>\n",
       "      <td id=\"T_e5766_row5_col9\" class=\"data row5 col9\" >0.97</td>\n",
       "      <td id=\"T_e5766_row5_col10\" class=\"data row5 col10\" >0.79</td>\n",
       "      <td id=\"T_e5766_row5_col11\" class=\"data row5 col11\" >0.91</td>\n",
       "      <td id=\"T_e5766_row5_col12\" class=\"data row5 col12\" >0.76</td>\n",
       "      <td id=\"T_e5766_row5_col13\" class=\"data row5 col13\" >0.82</td>\n",
       "      <td id=\"T_e5766_row5_col14\" class=\"data row5 col14\" >0.94</td>\n",
       "      <td id=\"T_e5766_row5_col15\" class=\"data row5 col15\" >0.97</td>\n",
       "      <td id=\"T_e5766_row5_col16\" class=\"data row5 col16\" >0.91</td>\n",
       "      <td id=\"T_e5766_row5_col17\" class=\"data row5 col17\" >0.82</td>\n",
       "      <td id=\"T_e5766_row5_col18\" class=\"data row5 col18\" >0.73</td>\n",
       "      <td id=\"T_e5766_row5_col19\" class=\"data row5 col19\" >0.82</td>\n",
       "      <td id=\"T_e5766_row5_col20\" class=\"data row5 col20\" >0.88</td>\n",
       "      <td id=\"T_e5766_row5_col21\" class=\"data row5 col21\" >0.97</td>\n",
       "      <td id=\"T_e5766_row5_col22\" class=\"data row5 col22\" >0.91</td>\n",
       "      <td id=\"T_e5766_row5_col23\" class=\"data row5 col23\" >0.82</td>\n",
       "      <td id=\"T_e5766_row5_col24\" class=\"data row5 col24\" >0.82</td>\n",
       "      <td id=\"T_e5766_row5_col25\" class=\"data row5 col25\" >0.91</td>\n",
       "      <td id=\"T_e5766_row5_col26\" class=\"data row5 col26\" >0.88</td>\n",
       "      <td id=\"T_e5766_row5_col27\" class=\"data row5 col27\" >1.00</td>\n",
       "      <td id=\"T_e5766_row5_col28\" class=\"data row5 col28\" >0.97</td>\n",
       "      <td id=\"T_e5766_row5_col29\" class=\"data row5 col29\" >0.91</td>\n",
       "      <td id=\"T_e5766_row5_col30\" class=\"data row5 col30\" >0.85</td>\n",
       "      <td id=\"T_e5766_row5_col31\" class=\"data row5 col31\" >0.79</td>\n",
       "      <td id=\"T_e5766_row5_col32\" class=\"data row5 col32\" >0.94</td>\n",
       "      <td id=\"T_e5766_row5_col33\" class=\"data row5 col33\" >1.00</td>\n",
       "      <td id=\"T_e5766_row5_col34\" class=\"data row5 col34\" >0.91</td>\n",
       "      <td id=\"T_e5766_row5_col35\" class=\"data row5 col35\" >0.85</td>\n",
       "      <td id=\"T_e5766_row5_col36\" class=\"data row5 col36\" >0.85</td>\n",
       "      <td id=\"T_e5766_row5_col37\" class=\"data row5 col37\" >0.64</td>\n",
       "      <td id=\"T_e5766_row5_col38\" class=\"data row5 col38\" >0.85</td>\n",
       "      <td id=\"T_e5766_row5_col39\" class=\"data row5 col39\" >0.94</td>\n",
       "      <td id=\"T_e5766_row5_col40\" class=\"data row5 col40\" >0.91</td>\n",
       "      <td id=\"T_e5766_row5_col41\" class=\"data row5 col41\" >0.82</td>\n",
       "      <td id=\"T_e5766_row5_col42\" class=\"data row5 col42\" >0.64</td>\n",
       "      <td id=\"T_e5766_row5_col43\" class=\"data row5 col43\" >0.79</td>\n",
       "      <td id=\"T_e5766_row5_col44\" class=\"data row5 col44\" >0.91</td>\n",
       "      <td id=\"T_e5766_row5_col45\" class=\"data row5 col45\" >0.97</td>\n",
       "      <td id=\"T_e5766_row5_col46\" class=\"data row5 col46\" >0.82</td>\n",
       "      <td id=\"T_e5766_row5_col47\" class=\"data row5 col47\" >0.91</td>\n",
       "      <td id=\"T_e5766_row5_col48\" class=\"data row5 col48\" >0.79</td>\n",
       "      <td id=\"T_e5766_row5_col49\" class=\"data row5 col49\" >0.85</td>\n",
       "      <td id=\"T_e5766_row5_col50\" class=\"data row5 col50\" >0.88</td>\n",
       "      <td id=\"T_e5766_row5_col51\" class=\"data row5 col51\" >1.00</td>\n",
       "      <td id=\"T_e5766_row5_col52\" class=\"data row5 col52\" >0.76</td>\n",
       "      <td id=\"T_e5766_row5_col53\" class=\"data row5 col53\" >0.91</td>\n",
       "      <td id=\"T_e5766_row5_col54\" class=\"data row5 col54\" >0.82</td>\n",
       "      <td id=\"T_e5766_row5_col55\" class=\"data row5 col55\" >0.85</td>\n",
       "      <td id=\"T_e5766_row5_col56\" class=\"data row5 col56\" >0.97</td>\n",
       "      <td id=\"T_e5766_row5_col57\" class=\"data row5 col57\" >0.97</td>\n",
       "      <td id=\"T_e5766_row5_col58\" class=\"data row5 col58\" >0.91</td>\n",
       "      <td id=\"T_e5766_row5_col59\" class=\"data row5 col59\" >0.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_e5766_level0_row6\" class=\"row_heading level0 row6\" >Average</th>\n",
       "      <td id=\"T_e5766_row6_col0\" class=\"data row6 col0\" >0.45</td>\n",
       "      <td id=\"T_e5766_row6_col1\" class=\"data row6 col1\" >0.62</td>\n",
       "      <td id=\"T_e5766_row6_col2\" class=\"data row6 col2\" >0.71</td>\n",
       "      <td id=\"T_e5766_row6_col3\" class=\"data row6 col3\" >0.79</td>\n",
       "      <td id=\"T_e5766_row6_col4\" class=\"data row6 col4\" >0.55</td>\n",
       "      <td id=\"T_e5766_row6_col5\" class=\"data row6 col5\" >0.60</td>\n",
       "      <td id=\"T_e5766_row6_col6\" class=\"data row6 col6\" >0.42</td>\n",
       "      <td id=\"T_e5766_row6_col7\" class=\"data row6 col7\" >0.62</td>\n",
       "      <td id=\"T_e5766_row6_col8\" class=\"data row6 col8\" >0.69</td>\n",
       "      <td id=\"T_e5766_row6_col9\" class=\"data row6 col9\" >0.83</td>\n",
       "      <td id=\"T_e5766_row6_col10\" class=\"data row6 col10\" >0.49</td>\n",
       "      <td id=\"T_e5766_row6_col11\" class=\"data row6 col11\" >0.58</td>\n",
       "      <td id=\"T_e5766_row6_col12\" class=\"data row6 col12\" >0.51</td>\n",
       "      <td id=\"T_e5766_row6_col13\" class=\"data row6 col13\" >0.58</td>\n",
       "      <td id=\"T_e5766_row6_col14\" class=\"data row6 col14\" >0.70</td>\n",
       "      <td id=\"T_e5766_row6_col15\" class=\"data row6 col15\" >0.86</td>\n",
       "      <td id=\"T_e5766_row6_col16\" class=\"data row6 col16\" >0.61</td>\n",
       "      <td id=\"T_e5766_row6_col17\" class=\"data row6 col17\" >0.63</td>\n",
       "      <td id=\"T_e5766_row6_col18\" class=\"data row6 col18\" >0.44</td>\n",
       "      <td id=\"T_e5766_row6_col19\" class=\"data row6 col19\" >0.60</td>\n",
       "      <td id=\"T_e5766_row6_col20\" class=\"data row6 col20\" >0.67</td>\n",
       "      <td id=\"T_e5766_row6_col21\" class=\"data row6 col21\" >0.83</td>\n",
       "      <td id=\"T_e5766_row6_col22\" class=\"data row6 col22\" >0.53</td>\n",
       "      <td id=\"T_e5766_row6_col23\" class=\"data row6 col23\" >0.61</td>\n",
       "      <td id=\"T_e5766_row6_col24\" class=\"data row6 col24\" >0.46</td>\n",
       "      <td id=\"T_e5766_row6_col25\" class=\"data row6 col25\" >0.63</td>\n",
       "      <td id=\"T_e5766_row6_col26\" class=\"data row6 col26\" >0.67</td>\n",
       "      <td id=\"T_e5766_row6_col27\" class=\"data row6 col27\" >0.80</td>\n",
       "      <td id=\"T_e5766_row6_col28\" class=\"data row6 col28\" >0.58</td>\n",
       "      <td id=\"T_e5766_row6_col29\" class=\"data row6 col29\" >0.64</td>\n",
       "      <td id=\"T_e5766_row6_col30\" class=\"data row6 col30\" >0.48</td>\n",
       "      <td id=\"T_e5766_row6_col31\" class=\"data row6 col31\" >0.61</td>\n",
       "      <td id=\"T_e5766_row6_col32\" class=\"data row6 col32\" >0.66</td>\n",
       "      <td id=\"T_e5766_row6_col33\" class=\"data row6 col33\" >0.82</td>\n",
       "      <td id=\"T_e5766_row6_col34\" class=\"data row6 col34\" >0.57</td>\n",
       "      <td id=\"T_e5766_row6_col35\" class=\"data row6 col35\" >0.60</td>\n",
       "      <td id=\"T_e5766_row6_col36\" class=\"data row6 col36\" >0.52</td>\n",
       "      <td id=\"T_e5766_row6_col37\" class=\"data row6 col37\" >0.54</td>\n",
       "      <td id=\"T_e5766_row6_col38\" class=\"data row6 col38\" >0.65</td>\n",
       "      <td id=\"T_e5766_row6_col39\" class=\"data row6 col39\" >0.80</td>\n",
       "      <td id=\"T_e5766_row6_col40\" class=\"data row6 col40\" >0.61</td>\n",
       "      <td id=\"T_e5766_row6_col41\" class=\"data row6 col41\" >0.56</td>\n",
       "      <td id=\"T_e5766_row6_col42\" class=\"data row6 col42\" >0.47</td>\n",
       "      <td id=\"T_e5766_row6_col43\" class=\"data row6 col43\" >0.61</td>\n",
       "      <td id=\"T_e5766_row6_col44\" class=\"data row6 col44\" >0.68</td>\n",
       "      <td id=\"T_e5766_row6_col45\" class=\"data row6 col45\" >0.78</td>\n",
       "      <td id=\"T_e5766_row6_col46\" class=\"data row6 col46\" >0.49</td>\n",
       "      <td id=\"T_e5766_row6_col47\" class=\"data row6 col47\" >0.59</td>\n",
       "      <td id=\"T_e5766_row6_col48\" class=\"data row6 col48\" >0.43</td>\n",
       "      <td id=\"T_e5766_row6_col49\" class=\"data row6 col49\" >0.57</td>\n",
       "      <td id=\"T_e5766_row6_col50\" class=\"data row6 col50\" >0.65</td>\n",
       "      <td id=\"T_e5766_row6_col51\" class=\"data row6 col51\" >0.80</td>\n",
       "      <td id=\"T_e5766_row6_col52\" class=\"data row6 col52\" >0.54</td>\n",
       "      <td id=\"T_e5766_row6_col53\" class=\"data row6 col53\" >0.61</td>\n",
       "      <td id=\"T_e5766_row6_col54\" class=\"data row6 col54\" >0.47</td>\n",
       "      <td id=\"T_e5766_row6_col55\" class=\"data row6 col55\" >0.59</td>\n",
       "      <td id=\"T_e5766_row6_col56\" class=\"data row6 col56\" >0.67</td>\n",
       "      <td id=\"T_e5766_row6_col57\" class=\"data row6 col57\" >0.81</td>\n",
       "      <td id=\"T_e5766_row6_col58\" class=\"data row6 col58\" >0.53</td>\n",
       "      <td id=\"T_e5766_row6_col59\" class=\"data row6 col59\" >0.63</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x7f7dd08dae60>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "eval = ts_33.evaluate()\n",
    "table = evaluation_as_table(eval)\n",
    "table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from cot import Collection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package punkt to /home/kon/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    }
   ],
   "source": [
    "ts_33 = Collection.from_json(\"/home/kon/work/ThoughtSource/libs/cot/cot/datasets/thoughtsource/thoughtsource_33_paper.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "commonsense_qa {60}\n",
      "med_qa {60}\n",
      "medmc_qa {60}\n",
      "open_book_qa {60}\n",
      "strategy_qa {60}\n",
      "worldtree {60}\n"
     ]
    }
   ],
   "source": [
    "ts_33.number_generated_cots()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "ts_33.select_generated_cots(cot_trigger = None, instruction = None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ts_33.dump(\"thoughtsource_33_paper.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ts_33.evaluate(overwrite=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
